1 Setup

Set the working dir

setwd("/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR")

Libraries

suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(gplots))
suppressPackageStartupMessages(library(LSD))
suppressPackageStartupMessages(library(parallel))
suppressPackageStartupMessages(library(pander))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(scatterplot3d))
suppressPackageStartupMessages(library(tximport))
suppressPackageStartupMessages(library(vsn))
suppressPackageStartupMessages(library(VennDiagram))
suppressMessages(source("~/Git/UPSCb/src/R/volcanoPlot.R"))

Source some helper functions

source("~/Git/UPSCb/src/R/plot.multidensity.R")
source("~/Git/UPSCb/src/R/featureSelection.R")

Create a palette

pal <- brewer.pal(9,"Paired")

Register the default plot margin

mar <- par("mar")

2 Get the count data

2.1 Raw data

2.1.1 Loading

Read the sample information

kg <- read.table("/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/analysis_Tom/batch2-unormalised-gene-expression_data.csv", header = TRUE)
load("/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/analysis_Tom/batch2_counts.rda")

summarise to genes

#tx2gene <- data.frame(TXID=rownames(tx$counts),
#                      GENEID=sub("\\.[0-9]+","",rownames(tx$counts)))

#gx <- summarizeToGene(tx,tx2gene=tx2gene)

#kg <- round(gx$counts) 

Sanity check

stopifnot(all(colnames(kg) == samples$SciLifeID))

2.1.2 Preliminary validations

2.1.2.1 Check for the genes that are never expressed

sel <- rowSums(kg) == 0 
# str(sel)= logical vector
# sums the counts for one gene in each samples
# show TRUE or FALSE for each gene if the gene is never expressed (has 0 counts in every time samples)

2.1.2.2 Check for the number of genes never expressed

sprintf("%s%% (%s) of %s genes are not expressed",
        round(sum(sel) * 100/ nrow(kg),digits=1),
        sum(sel),
        nrow(kg))
## [1] "19.6% (6324) of 32310 genes are not expressed"
#in our experiment the number of genes and transcript are the same (one transcript for one gene)

2.1.2.3 Display the samples mean raw counts distribution

i.e. the mean raw count of every gene across samples is calculated and displayed on a log10 scale.

plot(density(log10(rowMeans(kg))),col=pal[1],
     main="gene mean raw counts distribution",
     xlab="mean raw counts (log10)")

#density=likelihood, chance
# plot the likelihood that the mean of genes has this mean of raw counts 
# give an idea of the read depth, here the majority of gene maps ~30 reads (1.5 log10) 

2.1.2.4 Display all samples raw counts distribution

cols <- sample(pal,nlevels(samples$SampleName),replace = TRUE)
plot.multidensity(mclapply(1:ncol(kg),function(k){log10(kg)[,k]},mc.cores=16L),
                  col=cols[as.integer(samples$User.ID)],
                  legend.x="topright",
                  legend=levels(samples$User.ID),
                  legend.col=cols,
                  legend.lwd=2,
                  main="samples raw counts distribution",
                  xlab="per gene raw counts (log10)")

3 Verification of the sample distribution by a multivariate approach

3.1 Use of a Multidimensional scaling plot of distances

plotMDS(kg, cex=0.7)

3.2 PCA analysis

3.2.1 Establishment of the PCA

conditions1 <- factor(paste(samples$AZD,samples$Nutrition))
pch<-as.integer(factor(samples$Timepoint))+14
pc <- prcomp(t(kg))

percent <- round(summary(pc)$importance[2,]*100);percent
##  PC1  PC2  PC3  PC4  PC5  PC6  PC7  PC8  PC9 PC10 PC11 PC12 PC13 PC14 PC15 
##   48   26   16    6    3    1    0    0    0    0    0    0    0    0    0 
## PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24 PC25 PC26 PC27 PC28 PC29 
##    0    0    0    0    0    0    0    0    0    0    0    0    0    0

3.2.2 Verification of working in Batch2 only

factor(substr(sub(".*_","",samples$SciLifeID),1,1))
##  [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## Levels: 2
plot(0,0,type="n",xlim=range(pc$x[,1]),ylim=range(pc$x[,2]))
text(pc$x[,1],pc$x[,2],cex=.7,substr(sub(".*_","",samples$SciLifeID),1,1),adj = c(1,-1))

Looking for conditions coming from different batches

samples[samples$AZD=="DMSO" & samples$Nutrition == "NP",]
##     SciLifeID SampleName Timepoint Nutrition  AZD
## 13 P11554_213       I6-7         6        NP DMSO
## 14 P11554_214       I6-8         6        NP DMSO
## 15 P11554_215       I6-9         6        NP DMSO
samples[samples$AZD=="DMSO" & samples$Nutrition == "NPS",]
##    SciLifeID SampleName Timepoint Nutrition  AZD
## 7 P11554_207       I6-1         6       NPS DMSO
## 8 P11554_208       I6-2         6       NPS DMSO
## 9 P11554_209       I6-3         6       NPS DMSO
samples[samples$AZD=="DMSO" & samples$Nutrition == "PKS",]
##     SciLifeID SampleName Timepoint Nutrition  AZD
## 19 P11554_219      I6-13         6       PKS DMSO
## 20 P11554_220      I6-14         6       PKS DMSO
## 21 P11554_221      I6-15         6       PKS DMSO
samples[samples$AZD=="DMSO" & samples$Nutrition == "NS",]
##     SciLifeID SampleName Timepoint Nutrition  AZD
## 25 P11554_225      I6-19         6        NS DMSO
## 26 P11554_226      I6-20         6        NS DMSO
## 27 P11554_227      I6-21         6        NS DMSO
samples[samples$AZD=="AZD" & samples$Nutrition == "NP",]
##     SciLifeID SampleName Timepoint Nutrition AZD
## 16 P11554_216      I6-10         6        NP AZD
## 17 P11554_217      I6-11         6        NP AZD
## 18 P11554_218      I6-12         6        NP AZD
samples[samples$AZD=="AZD" & samples$Nutrition == "NPS",]
##     SciLifeID SampleName Timepoint Nutrition AZD
## 10 P11554_210       J6-4         6       NPS AZD
## 11 P11554_211       I6-5         6       NPS AZD
## 12 P11554_212       I6-6         6       NPS AZD
samples[samples$AZD=="AZD" & samples$Nutrition == "PKS",]
##     SciLifeID SampleName Timepoint Nutrition AZD
## 22 P11554_222      I6-16         6       PKS AZD
## 23 P11554_223      I6-17         6       PKS AZD
## 24 P11554_224      I6-18         6       PKS AZD
samples[samples$AZD=="AZD" & samples$Nutrition == "NS",]
##     SciLifeID SampleName Timepoint Nutrition AZD
## 28 P11554_228      I6-22         6        NS AZD
## 29 P11554_229      I6-23         6        NS AZD
## 30 P11554_230      I6-24         6        NS AZD
samples[samples$AZD=="0",]
##    SciLifeID SampleName Timepoint Nutrition AZD
## 2 P11554_202       J0-7         0        T0   0
## 3 P11554_203      J0-23         0        T0   0
## 4 P11554_204       J0-5         0        T0   0
## 5 P11554_205      J0-21         0        T0   0
## 6 P11554_206      J0-11         0        T0   0

3.2.3 PCA for biological meaning

plot(pc$x[,1],
     pc$x[,2],
     xlab=paste("Comp. 1 (",percent[1],"%)",sep=""),
     ylab=paste("Comp. 2 (",percent[2],"%)",sep=""),
     col=pal[as.integer(factor(conditions1))],
     pch=pch)
legend("bottomleft",pch=19,
       col=pal[as.integer(levels(factor(as.integer(conditions1))))],
       legend=levels(factor(conditions1)))
legend("topleft",pch=as.integer(levels(factor(pch))),
       col="black",
       legend=levels(factor(samples$Timepoint)))

mar=c(5.1,4.1,4.1,2.1)
scatterplot3d(pc$x[,1],
              pc$x[,2],
              pc$x[,3],
              xlab=paste("Comp. 1 (",percent[1],"%)",sep=""),
              ylab=paste("Comp. 2 (",percent[2],"%)",sep=""),
              zlab=paste("Comp. 3 (",percent[3],"%)",sep=""),
              color=pal[as.integer(factor(conditions1))],
              pch=pch)
legend("topleft",pch=19,
       col=pal[as.integer(levels(factor(as.integer(conditions1))))],
       legend=levels(factor(conditions1)))
legend("topright",pch=as.integer(levels(factor(pch))),
       col="black",
       legend=levels(factor(samples$Timepoint)))

par(mar=c(5.1, 4.1, 4.1, 2.1))

4 Biological analysis

For visualization, the data is submitted to a variance stabilization transformation using DESeq2. The dispersion is estimated independently of the sample tissue and replicate

Create the dds object, without giving any prior on the design

#

4.1 Data normalisation to T0

4.1.1 Normalization procedure

conditions <- factor(paste(samples$Timepoint,".",samples$Nutrition,".",samples$AZD))

conditions <- relevel(conditions,ref = "0 . T0 . 0")

dds.kg <- DESeqDataSetFromMatrix(
    countData = kg,
    colData = data.frame(condition=conditions),
    design = ~ condition)
## converting counts to integer mode
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]

Check the size factors (i.e. the sequencing library size effect)

dds.kg <- estimateSizeFactors(dds.kg)
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
sizes.kg <- sizeFactors(dds.kg)
# names(sizes.kt) <- colnames(kt)
# not useful sizes.kt already have the right names
pander(sizes.kg)
Table continues below
P11554_202 P11554_203 P11554_204 P11554_205 P11554_206 P11554_207
0.6397 0.8337 0.6737 0.8355 0.9246 1.189
Table continues below
P11554_208 P11554_209 P11554_210 P11554_211 P11554_212 P11554_213
0.9961 0.8304 0.9225 1.362 1.092 0.9275
Table continues below
P11554_214 P11554_215 P11554_216 P11554_217 P11554_218 P11554_219
1.107 1.052 1.063 1.126 1.223 1.402
Table continues below
P11554_220 P11554_221 P11554_222 P11554_223 P11554_224 P11554_225
1.072 1.185 1.325 1.041 1.119 0.6975
P11554_226 P11554_227 P11554_228 P11554_229 P11554_230
1.035 1.072 1.255 0.7207 0.8754
boxplot(sizes.kg, main="Sequencing libraries size factor")
abline(h=1,lty=2,col="gray")

# There is a small variation between the mediane and the value 1.0

4.1.1.1 Variance Stabilising Transformation

4.1.1.1.1 Blind
system.time(vsd.kg <- varianceStabilizingTransformation(dds.kg, blind=TRUE))
##    user  system elapsed 
##  14.900  10.604  10.993
vst_blind <- assay(vsd.kg)

#colnames(vst.kt) <- colnames(kt)
#already have the right names
vst_blind <- vst_blind - min(vst_blind) 
# substracts from all values the smaller value, so the smaller value is now 0

Validate the VST

meanSdPlot(vst_blind[rowSums(vst_blind)>0,]) #mean variance stabilized between 0.5 and 1

meanSdPlot(log2(kg+1)) #without Variance Stabilizing Transformation: variation around 1

meanSdPlot(log2(counts(dds.kg,normalized=TRUE)+1)) #normalized read depth: variance is not stabilized ("curvy")

#VST validated: mean variance stabilized around 0.5


write.csv(vst_blind,"/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/Tom-normalizedToT0-data-vst-blind.csv")

#save(kg, vst_blind, file="vst_blind.rda")
save(kg, vst_blind, file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/vst_blind.rda")
4.1.1.1.2 vst aware (calculated accordingly to samplingDates)

Calculate vst aware with DESeq2 taking the date as a parameter

#
4.1.1.1.3 Model-aware
system.time(vsd2 <- varianceStabilizingTransformation(dds.kg, blind=FALSE))
##    user  system elapsed 
##  21.792   7.600  19.541
vst_aware <- assay(vsd2)

#colnames(vst.kt) <- colnames(kt)
#already have the right names
vst_aware <- vst_aware - min(vst_aware) 

# substracts from all values the smaller value, so the smaller value is now 0

Validate the VST

meanSdPlot(vst_aware[rowSums(vst_aware)>0,]) #mean variance stabilized between 0 and 0.5

meanSdPlot(log2(kg+1)) #without Variance Stabilizing Transformation: variation around 1

meanSdPlot(log2(counts(dds.kg,normalized=TRUE)+1)) #normalized read depth: variance is not stabilized ("curvy")

# vst_aware validated: mean variance is way lower when vst is calculated "aware of the date factor" compare to before when vst was calculated "blind"
# stabilized around 0.2 instead of 0.5 when vst was blind


write.csv(vst_aware,"/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/Tom-normalized-data-vst-aware.csv")
#save(kg, vst_aware, file="vst_aware.rda")
#load("vst_aware.rda")

Differential expression

dds.kg <- DESeq(dds.kg)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
## final dispersion estimates
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
## fitting model and testing

which results

resultsNames(dds.kg)
## [1] "Intercept"                             
## [2] "condition_6...NP...AZD_vs_0...T0...0"  
## [3] "condition_6...NP...DMSO_vs_0...T0...0" 
## [4] "condition_6...NPS...AZD_vs_0...T0...0" 
## [5] "condition_6...NPS...DMSO_vs_0...T0...0"
## [6] "condition_6...NS...AZD_vs_0...T0...0"  
## [7] "condition_6...NS...DMSO_vs_0...T0...0" 
## [8] "condition_6...PKS...AZD_vs_0...T0...0" 
## [9] "condition_6...PKS...DMSO_vs_0...T0...0"

4.1.2 Analysis of 6hrs timepoint

4.1.2.1 Effect of phosphorus starvation

res <- results(dds.kg,name="condition_6...NS...DMSO_vs_0...T0...0")

alpha=0.01
4.1.2.1.1 The log2 fold-change range is also rather tight.
plotMA(res)

4.1.2.1.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.1.2.1.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
six_NS_DMSO_vs_T0 <- rownames(res[sel,])
4.1.2.1.4 DE genes data export
write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NS_DMSO_vs_T0-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NS_DMSO_vs_T0-DEgenes__significant.csv")

4.1.2.2 Effect of full medium resupply

res <- results(dds.kg,name="condition_6...NPS...DMSO_vs_0...T0...0")

alpha=0.01
4.1.2.2.1 The log2 fold-change range is also rather tight.
plotMA(res)

4.1.2.2.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.1.2.2.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
six_NPS_DMSO_vs_T0 <- rownames(res[sel,])
4.1.2.2.4 DE genes data export
write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NPS_DMSO_vs_T0-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NPS_DMSO_vs_T0-DEgenes__significant.csv")

4.1.2.3 Effect of phosphorus starvation and AZD treatment

res <- results(dds.kg,name="condition_6...NS...AZD_vs_0...T0...0")

alpha=0.01
4.1.2.3.1 The log2 fold-change range is also rather tight.
plotMA(res)

4.1.2.3.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.1.2.3.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
six_NS_AZD_vs_T0 <- rownames(res[sel,])
4.1.2.3.4 DE genes data export
write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NS_AZD_vs_T0-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NS_AZD_vs_T0-DEgenes__significant.csv")

4.1.2.4 Effect of AZD treatment

res <- results(dds.kg,name="condition_6...NPS...AZD_vs_0...T0...0")

alpha=0.01
4.1.2.4.1 The log2 fold-change range is also rather tight.
plotMA(res)

4.1.2.4.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.1.2.4.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
six_NPS_AZD_vs_T0 <- rownames(res[sel,])
4.1.2.4.4 DE genes data export
write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NPS_AZD_vs_T0-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NPS_AZD_vs_T0-DEgenes__significant.csv")

4.1.2.5 Effect of nitrogen starvation

res <- results(dds.kg,name="condition_6...PKS...DMSO_vs_0...T0...0")

alpha=0.01
4.1.2.5.1 The log2 fold-change range is also rather tight.
plotMA(res)

4.1.2.5.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.1.2.5.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
six_PKS_DMSO_vs_T0 <- rownames(res[sel,])
4.1.2.5.4 DE genes data export
write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_PKS_DMSO_vs_T0-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_PKS_DMSO_vs_T0-DEgenes__significant.csv")

4.1.2.6 Effect of sucrose starvation

res <- results(dds.kg,name="condition_6...NP...DMSO_vs_0...T0...0")

alpha=0.01
4.1.2.6.1 The log2 fold-change range is also rather tight.
plotMA(res)

4.1.2.6.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.1.2.6.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
six_NP_DMSO_vs_T0 <- rownames(res[sel,])
4.1.2.6.4 DE genes data export
write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NP_DMSO_vs_T0-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NP_DMSO_vs_T0-DEgenes__significant.csv")

4.1.2.7 Effect of nitrogen starvation and AZD treatment

res <- results(dds.kg,name="condition_6...PKS...AZD_vs_0...T0...0")

alpha=0.01
4.1.2.7.1 The log2 fold-change range is also rather tight.
plotMA(res)

4.1.2.7.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.1.2.7.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
six_PKS_AZD_vs_T0 <- rownames(res[sel,])
4.1.2.7.4 DE genes data export
write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_PKS_AZD_vs_T0-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_PKS_AZD_vs_T0-DEgenes__significant.csv")

4.1.2.8 Effect of sucrose starvation and AZD treatment

res <- results(dds.kg,name="condition_6...NP...AZD_vs_0...T0...0")

alpha=0.01
4.1.2.8.1 The log2 fold-change range is also rather tight.
plotMA(res)

4.1.2.8.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.1.2.8.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
six_NP_AZD_vs_T0 <- rownames(res[sel,])
4.1.2.8.4 DE genes data export
write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NP_AZD_vs_T0-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/6_NP_AZD_vs_T0-DEgenes__significant.csv")

4.1.2.9 Venn diagrams

4.1.2.9.1 VennDiagram of +/- Phoshorus +/- AZD
SixH <- VennDiagram::calculate.overlap(list(six_NS_DMSO_vs_T0, six_NS_AZD_vs_T0,six_NPS_AZD_vs_T0,six_NPS_DMSO_vs_T0))
grid.newpage()
svg("Venn1.svg", width = 7, height = 7)
grid.draw(venn.diagram(list(six_NS_DMSO_vs_T0, six_NS_AZD_vs_T0,six_NPS_AZD_vs_T0,six_NPS_DMSO_vs_T0),
                       filename=NULL,
                       col=pal[1:4],
                       category.names=c("PStarv","PStarv_AZD","AZD","NoStarv")))
dev.off()
## png 
##   2
grid.draw(venn.diagram(list(six_NS_DMSO_vs_T0, six_NS_AZD_vs_T0,six_NPS_AZD_vs_T0,six_NPS_DMSO_vs_T0),
                       filename=NULL,
                       col=pal[1:4],
                       category.names=c("PStarv","PStarv_AZD","AZD","NoStarv")))

4.1.2.9.2 VennDiagram of all nutrition in DMSO
SixH <- VennDiagram::calculate.overlap(list(six_NS_DMSO_vs_T0,
                                            six_NPS_DMSO_vs_T0,
                                            six_PKS_DMSO_vs_T0,
                                            six_NP_DMSO_vs_T0))
grid.newpage()
svg("Venn2.svg", width = 7, height = 7)
grid.draw(venn.diagram(list(six_NS_DMSO_vs_T0,
                            six_NPS_DMSO_vs_T0,
                            six_PKS_DMSO_vs_T0,
                            six_NP_DMSO_vs_T0),
                       filename=NULL,
                       col=pal[1:4],
                       category.names=c("Pstarv","full","Nstarv","Sstarv")))
dev.off()
## png 
##   2
grid.draw(venn.diagram(list(six_NS_DMSO_vs_T0,
                            six_NPS_DMSO_vs_T0,
                            six_PKS_DMSO_vs_T0,
                            six_NP_DMSO_vs_T0),
                       filename=NULL,
                       col=pal[1:4],
                       category.names=c("Pstarv","full","Nstarv","Sstarv")))

4.1.2.9.3 VennDiagram of all nutrition in AZD
SixH <- VennDiagram::calculate.overlap(list(six_NS_AZD_vs_T0,
                                            six_NPS_AZD_vs_T0,
                                            six_PKS_AZD_vs_T0,
                                            six_NP_AZD_vs_T0))
grid.newpage()
svg("Venn3.svg", width = 7, height = 7)
grid.draw(venn.diagram(list(six_NS_AZD_vs_T0,
                            six_NPS_AZD_vs_T0,
                            six_PKS_AZD_vs_T0,
                            six_NP_AZD_vs_T0),
                       filename=NULL,
                       col=pal[1:4],
                       category.names=c("Pstarv & AZD","full & AZD","Nstarv & AZD","Sstarv & AZD")))
dev.off()
## png 
##   2
grid.draw(venn.diagram(list(six_NS_AZD_vs_T0,
                            six_NPS_AZD_vs_T0,
                            six_PKS_AZD_vs_T0,
                            six_NP_AZD_vs_T0),
                       filename=NULL,
                       col=pal[1:4],
                       category.names=c("Pstarv & AZD","full & AZD","Nstarv & AZD","Sstarv & AZD")))

4.2 Data normalisation to NPS_DMSO

conditions <- factor(paste(samples$Timepoint,".",samples$Nutrition,".",samples$AZD))

conditions <- relevel(conditions,ref = "6 . NPS . DMSO")

dds.kg <- DESeqDataSetFromMatrix(
    countData = kg,
    colData = data.frame(condition=conditions),
    design = ~ condition)
## converting counts to integer mode
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]

Check the size factors (i.e. the sequencing library size effect)

dds.kg <- estimateSizeFactors(dds.kg)
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
sizes.kg <- sizeFactors(dds.kg)
# names(sizes.kt) <- colnames(kt)
# not useful sizes.kt already have the right names
pander(sizes.kg)
Table continues below
P11554_202 P11554_203 P11554_204 P11554_205 P11554_206 P11554_207
0.6397 0.8337 0.6737 0.8355 0.9246 1.189
Table continues below
P11554_208 P11554_209 P11554_210 P11554_211 P11554_212 P11554_213
0.9961 0.8304 0.9225 1.362 1.092 0.9275
Table continues below
P11554_214 P11554_215 P11554_216 P11554_217 P11554_218 P11554_219
1.107 1.052 1.063 1.126 1.223 1.402
Table continues below
P11554_220 P11554_221 P11554_222 P11554_223 P11554_224 P11554_225
1.072 1.185 1.325 1.041 1.119 0.6975
P11554_226 P11554_227 P11554_228 P11554_229 P11554_230
1.035 1.072 1.255 0.7207 0.8754
boxplot(sizes.kg, main="Sequencing libraries size factor")
abline(h=1,lty=2,col="gray")

# There is a small variation between the mediane and the value 1.0

4.2.1 Variance Stabilising Transformation

4.2.1.1 Blind

system.time(vsd.kg <- varianceStabilizingTransformation(dds.kg, blind=TRUE))
##    user  system elapsed 
##  13.732   9.524  10.138
vst_blind <- assay(vsd.kg)

#colnames(vst.kt) <- colnames(kt)
#already have the right names
vst_blind <- vst_blind - min(vst_blind) 
# substracts from all values the smaller value, so the smaller value is now 0

Validate the VST

meanSdPlot(vst_blind[rowSums(vst_blind)>0,]) #mean variance stabilized between 0.5 and 1

meanSdPlot(log2(kg+1)) #without Variance Stabilizing Transformation: variation around 1

meanSdPlot(log2(counts(dds.kg,normalized=TRUE)+1)) #normalized read depth: variance is not stabilized ("curvy")

#VST validated: mean variance stabilized around 0.5


write.csv(vst_blind,"/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/Tom-normalizedTo6_NPS_DMSO-data-vst-blind.csv")

#save(kg, vst_blind, file="vst_blind.rda")
save(kg, vst_blind, file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/vst_blind.rda")

4.2.1.2 vst aware (calculated accordingly to samplingDates)

Calculate vst aware with DESeq2 taking the date as a parameter

#

4.2.1.3 Model-aware

system.time(vsd2 <- varianceStabilizingTransformation(dds.kg, blind=FALSE))
##    user  system elapsed 
##  20.404   7.472  18.218
vst_aware <- assay(vsd2)

#colnames(vst.kt) <- colnames(kt)
#already have the right names
vst_aware <- vst_aware - min(vst_aware) 

# substracts from all values the smaller value, so the smaller value is now 0

Validate the VST

meanSdPlot(vst_aware[rowSums(vst_aware)>0,]) #mean variance stabilized between 0 and 0.5

meanSdPlot(log2(kg+1)) #without Variance Stabilizing Transformation: variation around 1

meanSdPlot(log2(counts(dds.kg,normalized=TRUE)+1)) #normalized read depth: variance is not stabilized ("curvy")

# vst_aware validated: mean variance is way lower when vst is calculated "aware of the date factor" compare to before when vst was calculated "blind"
# stabilized around 0.2 instead of 0.5 when vst was blind


write.csv(vst_aware,"/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/Tom-normalizedTo_6_NPS_DMSO-data-vst-aware.csv")
#save(kg, vst_aware, file="vst_aware.rda")
#load("vst_aware.rda")

Differential expression

dds.kg <- DESeq(dds.kg)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
## final dispersion estimates
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
## fitting model and testing

which results

resultsNames(dds.kg)
## [1] "Intercept"                                 
## [2] "condition_0...T0...0_vs_6...NPS...DMSO"    
## [3] "condition_6...NP...AZD_vs_6...NPS...DMSO"  
## [4] "condition_6...NP...DMSO_vs_6...NPS...DMSO" 
## [5] "condition_6...NPS...AZD_vs_6...NPS...DMSO" 
## [6] "condition_6...NS...AZD_vs_6...NPS...DMSO"  
## [7] "condition_6...NS...DMSO_vs_6...NPS...DMSO" 
## [8] "condition_6...PKS...AZD_vs_6...NPS...DMSO" 
## [9] "condition_6...PKS...DMSO_vs_6...NPS...DMSO"

4.2.2 Effect of AZD treatment in NPS

res <- results(dds.kg,name="condition_6...NPS...AZD_vs_6...NPS...DMSO")

alpha=0.01

4.2.2.1 The log2 fold-change range is also rather tight.

plotMA(res)

4.2.2.2 Plot the log odds vs. log2 fold change

#

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.2.2.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
AZDvsDMSO_in_NPS <- rownames(res[sel,])

4.2.2.4 DE genes data export

write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/AZDvsDMSO_in_NPS-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/AZDvsDMSO_in_NPS-DEgenes__significant.csv")

4.2.3 Effect of Phosphorus starvation

res <- results(dds.kg,name="condition_6...NS...DMSO_vs_6...NPS...DMSO")

alpha=0.01

4.2.3.1 The log2 fold-change range is also rather tight.

plotMA(res)

4.2.3.2 Plot the log odds vs. log2 fold change

#

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.2.3.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
NSvsNPS_in_DMSO <- rownames(res[sel,])

4.2.3.4 DE genes data export

write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/NSvsNPS_in_DMSO-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/NSvsNPS_in_DMSO-DEgenes__significant.csv")

4.2.4 Effect of Nitrogen starvation

res <- results(dds.kg,name="condition_6...PKS...DMSO_vs_6...NPS...DMSO")

alpha=0.01

4.2.4.1 The log2 fold-change range is also rather tight.

plotMA(res)

4.2.4.2 Plot the log odds vs. log2 fold change

#

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.2.4.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
PKSvsNPS_in_DMSO <- rownames(res[sel,])

4.2.4.4 DE genes data export

write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/PKSvsNPS_in_DMSO-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/APKSvsNPS_in_DMSO-DEgenes__significant.csv")

4.2.5 Effect of sugar starvation

res <- results(dds.kg,name="condition_6...NP...DMSO_vs_6...NPS...DMSO")

alpha=0.01

4.2.5.1 The log2 fold-change range is also rather tight.

plotMA(res)

4.2.5.2 Plot the log odds vs. log2 fold change

#

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.2.5.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
NPvsNPS_in_DMSO <- rownames(res[sel,])

4.2.5.4 DE genes data export

write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/NPvsNPS_in_DMSO-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/NPvsNPS_in_DMSO-DEgenes__significant.csv")

4.2.6 Effect of AZD treatment and Phosphorus starvation compared to NPS_DMSO

res <- results(dds.kg,name="condition_6...NS...AZD_vs_6...NPS...DMSO")

alpha=0.01

4.2.6.1 The log2 fold-change range is also rather tight.

plotMA(res)

4.2.6.2 Plot the log odds vs. log2 fold change

#

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.2.6.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
NS.AZD_vs_NPS.DMSO <- rownames(res[sel,])

4.2.6.4 DE genes data export

write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/NS.AZD_vs_NPS.DMSO-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/NS.AZD_vs_NPS.DMSO-DEgenes__significant.csv")

4.2.7 Venn Diagram of Phosphorus starvation and AZD treatment compared to NPS_DMSO

SixH <- VennDiagram::calculate.overlap(list(NSvsNPS_in_DMSO,
                                            AZDvsDMSO_in_NPS,
                                            NS.AZD_vs_NPS.DMSO))
grid.newpage()
svg("Venn5.svg", width = 7, height = 7)
grid.draw(venn.diagram(list(NSvsNPS_in_DMSO,
                            AZDvsDMSO_in_NPS,
                            NS.AZD_vs_NPS.DMSO),
                       filename=NULL,
                       col=pal[1:3],
                       category.names=c("Pstarv","AZD","Pstarv & AZD")))
dev.off()
## png 
##   2
grid.draw(venn.diagram(list(NSvsNPS_in_DMSO,
                            AZDvsDMSO_in_NPS,
                            NS.AZD_vs_NPS.DMSO),
                       filename=NULL,
                       col=pal[1:3],
                       category.names=c("Pstarv","AZD","Pstarv & AZD")))

4.3 Data normalisation to PKS_DMSO

conditions <- factor(paste(samples$Timepoint,".",samples$Nutrition,".",samples$AZD))

conditions <- relevel(conditions,ref = "6 . PKS . DMSO")

dds.kg <- DESeqDataSetFromMatrix(
    countData = kg,
    colData = data.frame(condition=conditions),
    design = ~ condition)
## converting counts to integer mode
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]

Check the size factors (i.e. the sequencing library size effect)

dds.kg <- estimateSizeFactors(dds.kg)
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
sizes.kg <- sizeFactors(dds.kg)
# names(sizes.kt) <- colnames(kt)
# not useful sizes.kt already have the right names
pander(sizes.kg)
Table continues below
P11554_202 P11554_203 P11554_204 P11554_205 P11554_206 P11554_207
0.6397 0.8337 0.6737 0.8355 0.9246 1.189
Table continues below
P11554_208 P11554_209 P11554_210 P11554_211 P11554_212 P11554_213
0.9961 0.8304 0.9225 1.362 1.092 0.9275
Table continues below
P11554_214 P11554_215 P11554_216 P11554_217 P11554_218 P11554_219
1.107 1.052 1.063 1.126 1.223 1.402
Table continues below
P11554_220 P11554_221 P11554_222 P11554_223 P11554_224 P11554_225
1.072 1.185 1.325 1.041 1.119 0.6975
P11554_226 P11554_227 P11554_228 P11554_229 P11554_230
1.035 1.072 1.255 0.7207 0.8754
boxplot(sizes.kg, main="Sequencing libraries size factor")
abline(h=1,lty=2,col="gray")

# There is a small variation between the mediane and the value 1.0

4.3.1 Variance Stabilising Transformation

4.3.1.1 Blind

system.time(vsd.kg <- varianceStabilizingTransformation(dds.kg, blind=TRUE))
##    user  system elapsed 
##  13.232   9.312   9.867
vst_blind <- assay(vsd.kg)

#colnames(vst.kt) <- colnames(kt)
#already have the right names
vst_blind <- vst_blind - min(vst_blind) 
# substracts from all values the smaller value, so the smaller value is now 0

Validate the VST

meanSdPlot(vst_blind[rowSums(vst_blind)>0,]) #mean variance stabilized between 0.5 and 1

meanSdPlot(log2(kg+1)) #without Variance Stabilizing Transformation: variation around 1

meanSdPlot(log2(counts(dds.kg,normalized=TRUE)+1)) #normalized read depth: variance is not stabilized ("curvy")

#VST validated: mean variance stabilized around 0.5


write.csv(vst_blind,"/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/Tom-normalizedTo6_PKS_DMSO-data-vst-blind.csv")

#save(kg, vst_blind, file="vst_blind.rda")
save(kg, vst_blind, file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/vst_blind.rda")

4.3.1.2 vst aware (calculated accordingly to samplingDates)

Calculate vst aware with DESeq2 taking the date as a parameter

#

4.3.1.3 Model-aware

system.time(vsd2 <- varianceStabilizingTransformation(dds.kg, blind=FALSE))
##    user  system elapsed 
##  20.848   8.088  18.359
vst_aware <- assay(vsd2)

#colnames(vst.kt) <- colnames(kt)
#already have the right names
vst_aware <- vst_aware - min(vst_aware) 

# substracts from all values the smaller value, so the smaller value is now 0

Validate the VST

meanSdPlot(vst_aware[rowSums(vst_aware)>0,]) #mean variance stabilized between 0 and 0.5

meanSdPlot(log2(kg+1)) #without Variance Stabilizing Transformation: variation around 1

meanSdPlot(log2(counts(dds.kg,normalized=TRUE)+1)) #normalized read depth: variance is not stabilized ("curvy")

# vst_aware validated: mean variance is way lower when vst is calculated "aware of the date factor" compare to before when vst was calculated "blind"
# stabilized around 0.2 instead of 0.5 when vst was blind


write.csv(vst_aware,"/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/Tom-normalizedTo_6_PKS_DMSO-data-vst-aware.csv")
#save(kg, vst_aware, file="vst_aware.rda")
#load("vst_aware.rda")

Differential expression

dds.kg <- DESeq(dds.kg)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
## final dispersion estimates
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
## fitting model and testing

which results

resultsNames(dds.kg)
## [1] "Intercept"                                 
## [2] "condition_0...T0...0_vs_6...PKS...DMSO"    
## [3] "condition_6...NP...AZD_vs_6...PKS...DMSO"  
## [4] "condition_6...NP...DMSO_vs_6...PKS...DMSO" 
## [5] "condition_6...NPS...AZD_vs_6...PKS...DMSO" 
## [6] "condition_6...NPS...DMSO_vs_6...PKS...DMSO"
## [7] "condition_6...NS...AZD_vs_6...PKS...DMSO"  
## [8] "condition_6...NS...DMSO_vs_6...PKS...DMSO" 
## [9] "condition_6...PKS...AZD_vs_6...PKS...DMSO"

4.3.2 Effect of AZD treatment in PKS

res <- results(dds.kg,name="condition_6...PKS...AZD_vs_6...PKS...DMSO")

alpha=0.01

4.3.2.1 The log2 fold-change range is also rather tight.

plotMA(res)

4.3.2.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.3.2.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
AZDvsDMSO_in_PKS <- rownames(res[sel,])

4.3.2.4 DE genes data export

write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/AZDvsDMSO_in_PKS-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/AZDvsDMSO_in_PKS-DEgenes__significant.csv")

4.4 Data normalisation to NS_DMSO

conditions <- factor(paste(samples$Timepoint,".",samples$Nutrition,".",samples$AZD))

conditions <- relevel(conditions,ref = "6 . NS . DMSO")

dds.kg <- DESeqDataSetFromMatrix(
    countData = kg,
    colData = data.frame(condition=conditions),
    design = ~ condition)
## converting counts to integer mode
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]

Check the size factors (i.e. the sequencing library size effect)

dds.kg <- estimateSizeFactors(dds.kg)
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
sizes.kg <- sizeFactors(dds.kg)
# names(sizes.kt) <- colnames(kt)
# not useful sizes.kt already have the right names
pander(sizes.kg)
Table continues below
P11554_202 P11554_203 P11554_204 P11554_205 P11554_206 P11554_207
0.6397 0.8337 0.6737 0.8355 0.9246 1.189
Table continues below
P11554_208 P11554_209 P11554_210 P11554_211 P11554_212 P11554_213
0.9961 0.8304 0.9225 1.362 1.092 0.9275
Table continues below
P11554_214 P11554_215 P11554_216 P11554_217 P11554_218 P11554_219
1.107 1.052 1.063 1.126 1.223 1.402
Table continues below
P11554_220 P11554_221 P11554_222 P11554_223 P11554_224 P11554_225
1.072 1.185 1.325 1.041 1.119 0.6975
P11554_226 P11554_227 P11554_228 P11554_229 P11554_230
1.035 1.072 1.255 0.7207 0.8754
boxplot(sizes.kg, main="Sequencing libraries size factor")
abline(h=1,lty=2,col="gray")

# There is a small variation between the mediane and the value 1.0

4.4.1 Variance Stabilising Transformation

4.4.1.1 Blind

system.time(vsd.kg <- varianceStabilizingTransformation(dds.kg, blind=TRUE))
##    user  system elapsed 
##  13.668   9.584  10.810
vst_blind <- assay(vsd.kg)

#colnames(vst.kt) <- colnames(kt)
#already have the right names
vst_blind <- vst_blind - min(vst_blind) 
# substracts from all values the smaller value, so the smaller value is now 0

Validate the VST

meanSdPlot(vst_blind[rowSums(vst_blind)>0,]) #mean variance stabilized between 0.5 and 1

meanSdPlot(log2(kg+1)) #without Variance Stabilizing Transformation: variation around 1

meanSdPlot(log2(counts(dds.kg,normalized=TRUE)+1)) #normalized read depth: variance is not stabilized ("curvy")

#VST validated: mean variance stabilized around 0.5


write.csv(vst_blind,"/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/Tom-normalizedTo6_NS_DMSO-data-vst-blind.csv")

#save(kg, vst_blind, file="vst_blind.rda")
save(kg, vst_blind, file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/vst_blind.rda")

4.4.1.2 vst aware (calculated accordingly to samplingDates)

Calculate vst aware with DESeq2 taking the date as a parameter #### Model-aware

system.time(vsd2 <- varianceStabilizingTransformation(dds.kg, blind=FALSE))
##    user  system elapsed 
##  21.636   7.948  19.276
vst_aware <- assay(vsd2)

#colnames(vst.kt) <- colnames(kt)
#already have the right names
vst_aware <- vst_aware - min(vst_aware) 

# substracts from all values the smaller value, so the smaller value is now 0

Validate the VST

meanSdPlot(vst_aware[rowSums(vst_aware)>0,]) #mean variance stabilized between 0 and 0.5

meanSdPlot(log2(kg+1)) #without Variance Stabilizing Transformation: variation around 1

meanSdPlot(log2(counts(dds.kg,normalized=TRUE)+1)) #normalized read depth: variance is not stabilized ("curvy")

# vst_aware validated: mean variance is way lower when vst is calculated "aware of the date factor" compare to before when vst was calculated "blind"
# stabilized around 0.2 instead of 0.5 when vst was blind


write.csv(vst_aware,"/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/Tom-normalizedTo_6_NS_DMSO-data-vst-aware.csv")
#save(kg, vst_aware, file="vst_aware.rda")
#load("vst_aware.rda")

Differential expression

dds.kg <- DESeq(dds.kg)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
## final dispersion estimates
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
## fitting model and testing

which results

resultsNames(dds.kg)
## [1] "Intercept"                                
## [2] "condition_0...T0...0_vs_6...NS...DMSO"    
## [3] "condition_6...NP...AZD_vs_6...NS...DMSO"  
## [4] "condition_6...NP...DMSO_vs_6...NS...DMSO" 
## [5] "condition_6...NPS...AZD_vs_6...NS...DMSO" 
## [6] "condition_6...NPS...DMSO_vs_6...NS...DMSO"
## [7] "condition_6...NS...AZD_vs_6...NS...DMSO"  
## [8] "condition_6...PKS...AZD_vs_6...NS...DMSO" 
## [9] "condition_6...PKS...DMSO_vs_6...NS...DMSO"

4.4.2 Effect of AZD treatment in NS medium

res <- results(dds.kg,name="condition_6...NS...AZD_vs_6...NS...DMSO")

alpha=0.01

4.4.2.1 The log2 fold-change range is also rather tight.

plotMA(res)

4.4.2.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.4.2.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
AZDvsDMSO_in_NS <- rownames(res[sel,])

4.4.2.4 DE genes data export

write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/AZDvsDMSO_in_NS-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/AZDvsDMSO_in_NS-DEgenes__significant.csv")

4.5 Data normalisation to NP_DMSO

conditions <- factor(paste(samples$Timepoint,".",samples$Nutrition,".",samples$AZD))

conditions <- relevel(conditions,ref = "6 . NP . DMSO")

dds.kg <- DESeqDataSetFromMatrix(
    countData = kg,
    colData = data.frame(condition=conditions),
    design = ~ condition)
## converting counts to integer mode
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]

Check the size factors (i.e. the sequencing library size effect)

dds.kg <- estimateSizeFactors(dds.kg)
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
sizes.kg <- sizeFactors(dds.kg)
# names(sizes.kt) <- colnames(kt)
# not useful sizes.kt already have the right names
pander(sizes.kg)
Table continues below
P11554_202 P11554_203 P11554_204 P11554_205 P11554_206 P11554_207
0.6397 0.8337 0.6737 0.8355 0.9246 1.189
Table continues below
P11554_208 P11554_209 P11554_210 P11554_211 P11554_212 P11554_213
0.9961 0.8304 0.9225 1.362 1.092 0.9275
Table continues below
P11554_214 P11554_215 P11554_216 P11554_217 P11554_218 P11554_219
1.107 1.052 1.063 1.126 1.223 1.402
Table continues below
P11554_220 P11554_221 P11554_222 P11554_223 P11554_224 P11554_225
1.072 1.185 1.325 1.041 1.119 0.6975
P11554_226 P11554_227 P11554_228 P11554_229 P11554_230
1.035 1.072 1.255 0.7207 0.8754
boxplot(sizes.kg, main="Sequencing libraries size factor")
abline(h=1,lty=2,col="gray")

# There is a small variation between the mediane and the value 1.0

4.5.1 Variance Stabilising Transformation

4.5.1.1 Blind

system.time(vsd.kg <- varianceStabilizingTransformation(dds.kg, blind=TRUE))
##    user  system elapsed 
##  13.460   9.528  10.220
vst_blind <- assay(vsd.kg)

#colnames(vst.kt) <- colnames(kt)
#already have the right names
vst_blind <- vst_blind - min(vst_blind) 
# substracts from all values the smaller value, so the smaller value is now 0

Validate the VST

meanSdPlot(vst_blind[rowSums(vst_blind)>0,]) #mean variance stabilized between 0.5 and 1

meanSdPlot(log2(kg+1)) #without Variance Stabilizing Transformation: variation around 1

meanSdPlot(log2(counts(dds.kg,normalized=TRUE)+1)) #normalized read depth: variance is not stabilized ("curvy")

#VST validated: mean variance stabilized around 0.5


write.csv(vst_blind,"/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/Tom-normalizedTo6_NP_DMSO-data-vst-blind.csv")

#save(kg, vst_blind, file="vst_blind.rda")
save(kg, vst_blind, file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/vst_blind.rda")

4.5.1.2 vst aware (calculated accordingly to samplingDates)

Calculate vst aware with DESeq2 taking the date as a parameter

#

4.5.1.3 Model-aware

system.time(vsd2 <- varianceStabilizingTransformation(dds.kg, blind=FALSE))
##    user  system elapsed 
##  20.472   7.424  18.049
vst_aware <- assay(vsd2)

#colnames(vst.kt) <- colnames(kt)
#already have the right names
vst_aware <- vst_aware - min(vst_aware) 

# substracts from all values the smaller value, so the smaller value is now 0

Validate the VST

meanSdPlot(vst_aware[rowSums(vst_aware)>0,]) #mean variance stabilized between 0 and 0.5

meanSdPlot(log2(kg+1)) #without Variance Stabilizing Transformation: variation around 1

meanSdPlot(log2(counts(dds.kg,normalized=TRUE)+1)) #normalized read depth: variance is not stabilized ("curvy")

# vst_aware validated: mean variance is way lower when vst is calculated "aware of the date factor" compare to before when vst was calculated "blind"
# stabilized around 0.2 instead of 0.5 when vst was blind


write.csv(vst_aware,"/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/Tom-normalizedTo_6_NP_DMSO-data-vst-aware.csv")
#save(kg, vst_aware, file="vst_aware.rda")
#load("vst_aware.rda")

Differential expression

dds.kg <- DESeq(dds.kg)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
## final dispersion estimates
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not an warning or error]
## fitting model and testing

which results

resultsNames(dds.kg)
## [1] "Intercept"                                
## [2] "condition_0...T0...0_vs_6...NP...DMSO"    
## [3] "condition_6...NP...AZD_vs_6...NP...DMSO"  
## [4] "condition_6...NPS...AZD_vs_6...NP...DMSO" 
## [5] "condition_6...NPS...DMSO_vs_6...NP...DMSO"
## [6] "condition_6...NS...AZD_vs_6...NP...DMSO"  
## [7] "condition_6...NS...DMSO_vs_6...NP...DMSO" 
## [8] "condition_6...PKS...AZD_vs_6...NP...DMSO" 
## [9] "condition_6...PKS...DMSO_vs_6...NP...DMSO"

4.5.2 Effect of AZD treatment in NP medium

res <- results(dds.kg,name="condition_6...NP...AZD_vs_6...NP...DMSO")

alpha=0.01

4.5.2.1 The log2 fold-change range is also rather tight.

plotMA(res)

4.5.2.2 Plot the log odds vs. log2 fold change

The volcano plot is very flat, cross-validating the results of the MA plot

volcanoPlot(res,alpha=alpha)

4.5.2.3 Plot the adjusted p-value histogram

Which is as expected, over-enriched for adjusted p-values of 1

hist(res$padj,breaks=seq(0,1,.01))

# select genes that are significant
alpha=0.01
l2fc = 0.5
sel <- ! is.na(res$padj) & res$padj <= alpha & abs(res$log2FoldChange) >= l2fc

# list of DE genes that are significant
AZDvsDMSO_in_NP <- rownames(res[sel,])

4.5.2.4 DE genes data export

write.csv(res,file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/AZDvsDMSO_in_NP-DEgenes_all.csv")
write.csv(res[sel,],file="/mnt/picea/projects/arabidopsis/jhanson/arabidopsis-nutrition-TOR/AZDvsDMSO_in_NP-DEgenes__significant.csv")

4.6 VennDiagram of +/- AZD in different nutritional conditions

SixH <- VennDiagram::calculate.overlap(list(AZDvsDMSO_in_NPS,
                                            AZDvsDMSO_in_PKS,
                                            AZDvsDMSO_in_NS,
                                            AZDvsDMSO_in_NP))
grid.newpage()
svg("Venn4.svg", width = 7, height = 7)
grid.draw(venn.diagram(list(AZDvsDMSO_in_NPS,
                            AZDvsDMSO_in_PKS,
                            AZDvsDMSO_in_NS,
                            AZDvsDMSO_in_NP),
                       filename=NULL,
                       col=pal[1:4],
                       category.names=c("AZD in NPS","AZD in PKS","AZD in NS","AZD in NP")))
dev.off()
## png 
##   2
grid.draw(venn.diagram(list(AZDvsDMSO_in_NPS,
                            AZDvsDMSO_in_PKS,
                            AZDvsDMSO_in_NS,
                            AZDvsDMSO_in_NP),
                       filename=NULL,
                       col=pal[1:4],
                       category.names=c("AZD in NPS","AZD in PKS","AZD in NS","AZD in NP")))

5 Session Info

## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
## 
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] hexbin_1.27.2               limma_3.38.3               
##  [3] VennDiagram_1.6.20          futile.logger_1.4.3        
##  [5] vsn_3.50.0                  tximport_1.11.7            
##  [7] scatterplot3d_0.3-41        RColorBrewer_1.1-2         
##  [9] pander_0.6.3                LSD_4.0-0                  
## [11] gplots_3.0.1.1              DESeq2_1.22.2              
## [13] SummarizedExperiment_1.12.0 DelayedArray_0.8.0         
## [15] BiocParallel_1.16.6         matrixStats_0.54.0         
## [17] Biobase_2.42.0              GenomicRanges_1.34.0       
## [19] GenomeInfoDb_1.18.2         IRanges_2.16.0             
## [21] S4Vectors_0.20.1            BiocGenerics_0.28.0        
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-6           bit64_0.9-7            tools_3.5.1           
##  [4] backports_1.1.4        R6_2.4.0               affyio_1.52.0         
##  [7] rpart_4.1-15           KernSmooth_2.23-15     Hmisc_4.2-0           
## [10] DBI_1.0.0              lazyeval_0.2.2         colorspace_1.4-1      
## [13] nnet_7.3-12            tidyselect_0.2.5       gridExtra_2.3         
## [16] bit_1.1-14             compiler_3.5.1         preprocessCore_1.44.0 
## [19] formatR_1.6            htmlTable_1.13.1       labeling_0.3          
## [22] caTools_1.17.1.2       scales_1.0.0           checkmate_1.9.1       
## [25] genefilter_1.64.0      affy_1.60.0            stringr_1.4.0         
## [28] digest_0.6.18          foreign_0.8-71         rmarkdown_1.12        
## [31] XVector_0.22.0         base64enc_0.1-3        pkgconfig_2.0.2       
## [34] htmltools_0.3.6        highr_0.8              htmlwidgets_1.3       
## [37] rlang_0.3.4            rstudioapi_0.10        RSQLite_2.1.1         
## [40] gtools_3.8.1           acepack_1.4.1          dplyr_0.8.0.1         
## [43] RCurl_1.95-4.12        magrittr_1.5           GenomeInfoDbData_1.2.0
## [46] Formula_1.2-3          Matrix_1.2-17          Rcpp_1.0.1            
## [49] munsell_0.5.0          stringi_1.4.3          yaml_2.2.0            
## [52] zlibbioc_1.28.0        plyr_1.8.4             blob_1.1.1            
## [55] gdata_2.18.0           crayon_1.3.4           lattice_0.20-38       
## [58] splines_3.5.1          annotate_1.60.1        locfit_1.5-9.1        
## [61] knitr_1.22             pillar_1.3.1           geneplotter_1.60.0    
## [64] futile.options_1.0.1   XML_3.98-1.19          glue_1.3.1            
## [67] evaluate_0.13          latticeExtra_0.6-28    lambda.r_1.2.3        
## [70] data.table_1.12.2      BiocManager_1.30.4     gtable_0.3.0          
## [73] purrr_0.3.2            assertthat_0.2.1       ggplot2_3.1.1         
## [76] xfun_0.6               xtable_1.8-4           survival_2.44-1.1     
## [79] tibble_2.1.1           AnnotationDbi_1.44.0   memoise_1.1.0         
## [82] cluster_2.0.8